---
title: "02 Matching -- Children"
author: "Ye Shen, Radhika Tampi, Raj Vatsa"
output: pdf_document
---

# Setup
```{r}
knitr::opts_chunk$set(echo = TRUE)
if(!require(webshot)) {
  install.packages("webshot")
}
library(gt)
library(gtsummary)
library(tidyverse)
library(survey)
library(gridExtra)
library(grid)
library(foreign)
library(webshot)
library(MatchIt)
library(stringr)
webshot::install_phantomjs()
```


# Read in Data
```{r}
cleaned_children_did_alldis <- readRDS("data/cleaned_children_did_alldis.rds")
IHOPE_house_children_alldis <- read.csv("data/merged_IHOPE_house_2014_2016_v3.csv")
```

## Coarsened Exact Matching: by quintile of wealth score
```{r matching}
## Match individual households by wealth score quintile: 
# 1. Subset household data to only households containing children with any disease in both years
IHOPE_house_children_alldis <- IHOPE_house_children_alldis %>%
  subset(ID_menage %in% cleaned_children_did_alldis$ID_menage)

# 2. Create var in household data for intervention/control by household ID (ID_menage)
IHOPE_house_children_alldis$group <- cleaned_children_did_alldis$group[match(IHOPE_house_children_alldis$ID_menage, cleaned_children_did_alldis$ID_menage)]

# 3. Matching Procedure:
  # a) Only match on pre-treatment, so match households in 2014  based on wealth quintiles
  # b) 1:1 match intervention to control

  # Coarsened Exact Matching & explanation of options:
  # k2k: If TRUE nearest neighbor matching without replacement will take place within each stratum, and any unmatched units will be dropped (e.g., if there are more treated than control units in the stratum, the treated units without a match will be dropped)
  # k2k.method: how the distance between units should be calculated if k2k = TRUE

df <- IHOPE_house_children_alldis %>%
  filter(questionnaire_menage_annee == 2014) %>%
  matchit(formula = group ~ menage_score_bien_etre,
          method = "cem", cutpoints = list(menage_score_bien_etre = 5),
          k2k = TRUE, k2k.method = "euclidean")
  #euclidean ok because we are only matching on 1 var, no need to "weight" vars based on effect on dependent var
  #use 5 bins for quintiles (generated automatcially by MatchIt)

summary(df) #summary of matching: standardized mean diff goes from 0.42 to 0.0089 after matching
df <- match.data(df)
df <- df %>%
  arrange(subclass)

# 4. Create dataset of children with any disease from matched households:
children_disease_did <- cleaned_children_did_alldis %>%
  subset(ID_menage %in% df$ID_menage) %>%
  select(ID_menage, enfant_age_annee, year, cluster, wmweight, group, menage_score_bien_etre, ALL.disease, ALL.public_level, ALL.public_communitycenter,ALL.public_healthfacility, ALL.anytreatment, ALL.notreatment)

saveRDS(children_disease_did, "data/match_children.rds")

  rm(df) # remove original matched dataframe from R

# 5. Rough check of matching for 2014 (year == 0) --> maybe create violin plot? 
children_disease_did %>% 
  group_by(year, group) %>%
  select(group, year, menage_score_bien_etre) %>%
  summarize(mean = mean(menage_score_bien_etre), count = n()) %>%
  head()
```


## After matching, assign survey design to datasets
```{r}
svy_children_alldisease_match <- svydesign(
    id = ~cluster, strata = ~group, weights = ~wmweight,
    data = children_disease_did
  )
#subset survey data to 2014 and 2016 for summary stats
  svy_children_alldisease_match_2014 <- subset(svy_children_alldisease_match, year == 0)
  svy_children_alldisease_match_2016 <- subset(svy_children_alldisease_match, year == 1)
```

### Matched DiD regression adjusting for wealth index score
  * Care-seeking Outcome: Access to any public_level care (Visit to a PHF or CHW)
```{r}
#Adjusted DID regression of care-seeking outcome on year/group interaction and wealth score
  did_child_anyvisit_match <- svyglm(ALL.public_level ~ year*group + menage_score_bien_etre, 
                                     design = svy_children_alldisease_match, 
                                     family = gaussian)
```

## Table 2 for children with any illnesses
 * Outcome: Access to any public_level care (Visit to a PHF or CHW)
### Prepare Table 2 for children with any illnesses 
```{r}
table2_children <- 
  as.data.frame(array(data = NA, dim = c(2, 19)))
# "Indicator","Study group", "Denominator", "Numerator", "Percent (SE)", "Difference 
# (p-value)","Denominator", "Numerator", "Percent (SE)", "Difference (p-value)","Percent 
# relative change", "Percent absolute change", "Difference in differences (p-value)"
colnames(table2_children) <- 
  c("indicator", "group", "d_2014", "n_2014", "m_2014", "se_2014", "dif_2014", "p-val_2014", 
    "d_2016", "n_2016", "m_2016", "se_2016", "dif_2016", "p-val_2016", "trends_rel", 
    "trends_abs", "dif.in.dif(man)", "dif.in.dif(coef)", "p-val_did")
table2_children$indicator <- "ALL.public_level"

# Any visit (Access)
## Summary stats for 2014
table2_children[rev(c(1,2)), 2] <- 
    svyby(~ALL.disease, by = ~group, 
          design = svy_children_alldisease_match_2014, FUN = svytotal, 
          keep.var = T, na.rm = T)[, 1]

table2_children[rev(c(1,2)), 3] <- 
    round(coef(svyby(~ALL.disease, by = ~group, 
                     design = svy_children_alldisease_match_2014, 
                     FUN = svytotal, 
          keep.var = T, na.rm = T))) # 'd_2014' :denominator

table2_children[rev(c(1,2)), 4] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, 
                     design = svy_children_alldisease_match_2014, 
                     FUN = svytotal,
          keep.var = T, na.rm = T))) # 'n_2014': numerator

table2_children[rev(c(1,2)), 5] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, 
                     design = svy_children_alldisease_match_2014, 
                     FUN = svymean,
          keep.var = T, na.rm = T))*100,2) # 'm_2014' : average

table2_children[rev(c(1,2)), 6] <- 
    round(SE(svyby(~ALL.public_level, by = ~group, 
                   design = svy_children_alldisease_match_2014, 
                   FUN = svymean,
          keep.var = T, na.rm = T)),2) # 'se_2014' : standard error

table2_children[c(1,2), 7] <- table2_children[1, 5] - table2_children[2, 5]# 'dif_2014'

### Estimate p-value of the 2014 difference with a Chi2 test
  if (table2_children[1, 4] == 0 & 
      table2_children[2, 4] == 0) {
    table2_children[2, 8] <- NA
  } else {
    table2_children[2, 8] <- 
      round(svychisq(~ALL.public_level+group, 
                     design = svy_children_alldisease_match_2014)$p.value, 2)
  }
## Summary stats for 2016
table2_children[rev(c(1,2)), 9] <- 
    round(coef(svyby(~ALL.disease, by = ~group, 
                     design = svy_children_alldisease_match_2016, 
                     FUN = svytotal, 
          keep.var = T, na.rm = T))) # 'd_2016'

table2_children[rev(c(1,2)), 10] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, 
                     design = svy_children_alldisease_match_2016, 
                     FUN = svytotal,
          keep.var = T, na.rm = T))) # 'n_2016'

table2_children[rev(c(1,2)), 11] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, 
                     design = svy_children_alldisease_match_2016, 
                     FUN = svymean,
          keep.var = T, na.rm = T))*100,2) # 'm_2016'

table2_children[rev(c(1,2)), 12] <- 
    round(SE(svyby(~ALL.public_level, by = ~group, 
                   design = svy_children_alldisease_match_2016, 
                   FUN = svymean,
          keep.var = T, na.rm = T)),2) # 'se_2016'

table2_children[c(1,2), 13] <- table2_children[1, 11] - table2_children[2, 11]# 'dif_2016'

### Estimate p-value of the 2016 difference with a Chi2 test
  if (table2_children[1, 10] == 0 & 
      table2_children[2, 10] == 0) {
    table2_children[2, 14] <- NA
  } else {
    table2_children[2, 14] <- 
      round(svychisq(~ALL.public_level+group, 
                     design = svy_children_alldisease_match_2016)$p.value,2)
  }

## Estimate values for trends 2014-2016
  table2_children[rev(c(1, 2)), 15] <- round(((table2_children[rev(c(1, 2)), 11] - 
       table2_children[rev(c(1, 2)), 5]) / table2_children[rev(c(1, 2)), 5])*100,2) # 'trends_rel'
  
  table2_children[rev(c(1, 2)), 16] <- 
    table2_children[rev(c(1, 2)), 11] - table2_children[rev(c(1, 2)), 5] # 'trends_abs'
  
  table2_children[2, 17] <- table2_children[1, 16] - table2_children[2, 16] # 'dif.in.dif(man)'
  
## DiD stats adjusting for wealth
  # 'dif.in.dif(coef)': Coefficient of Difference-in-Differences comparison 
  table2_children[2, 18] <- 
    round(summary(did_child_anyvisit_match)$coefficients[5, 1]*100,2)
  # 'p-val_did': p-value of Difference-in-Differences comparison coefficient
  table2_children[2, 19] <- 
    round(summary(did_child_anyvisit_match)$coefficients[5, 4],2)
```
### Final Table 2 (Children)
```{r}
# Replicate final Table 2
final_table2_disease<-table2_children
final_table2_disease$m_2014 <- sprintf("%.2f (%.2f)", final_table2_disease$m_2014, 
                                       final_table2_disease$se_2014)
final_table2_disease$m_2016 <- sprintf("%.2f (%.2f)", final_table2_disease$m_2016, 
                                       final_table2_disease$se_2016)
final_table2_disease$dif_2014 <- sprintf("%.2f (%.2f)", final_table2_disease$dif_2014, 
                                         final_table2_disease$`p-val_2014`)
final_table2_disease$dif_2016 <- sprintf("%.2f (%.2f)", final_table2_disease$dif_2016, 
                                         final_table2_disease$`p-val_2016`)
# final_table2_disease$`dif.in.dif(man)` <- sprintf("%.2f (%.2f)", 
#                                                   final_table2_disease$`dif.in.dif(man)`, 
#                                                   final_table2_disease$`p-val_did`)
final_table2_disease$`dif.in.dif(coef)` <- sprintf("%.2f (%.2f)",
                                                   final_table2_disease$`dif.in.dif(coef)`,
                                                   final_table2_disease$`p-val_did`)
final_table2_disease$se_2014 <- NULL
final_table2_disease$se_2016 <- NULL
final_table2_disease$`p-val_2014` <- NULL
final_table2_disease$`p-val_2016` <- NULL
final_table2_disease$`dif.in.dif(coef)`[grep("NA", final_table2_disease$`dif.in.dif(coef)`)] <- "--"
final_table2_disease$`p-val_did` <- NULL
final_table2_disease$dif_2014[grep("NA", final_table2_disease$dif_2014)] <- "--"
final_table2_disease$dif_2016[grep("NA", final_table2_disease$dif_2016)] <- "--"
final_table2_disease$`dif.in.dif(man)` <- NULL 

final_table2_disease <- as_tibble(final_table2_disease) %>% 
  select(-c(n_2014, n_2016, indicator)) %>%
  mutate(
    group = case_when(group == 0 ~ "NG", 
                      group == 1 ~ "IG")) %>%
  rename(DiDcoef = `dif.in.dif(coef)`)
  
gt(final_table2_disease ) %>%
  tab_header(title = ("Table S1. Summary statistics of responses and trends on any visit to a PHF or CHW from the 2014 and 2016 IHOPE surveys among all children with any symptoms of diarrhea, fever, and/or coughing with difficulty breathing")) %>%
  tab_style(style = cell_text(align = "left"), location = cells_title("title")) %>%
  tab_spanner(
    label = "2014",
    columns = c(d_2014, m_2014, dif_2014)
  ) %>%
  tab_spanner(
    label = "2016",
    columns = c(d_2016, m_2016, dif_2016)
  ) %>%
  tab_spanner(
    label = "2014-2016 Trend",
    columns = c(trends_rel, trends_abs, DiDcoef)
  ) %>%
  tab_style(style = cell_text(weight = "bold"), 
            location = cells_column_spanners(spanners = everything())) %>%
    cols_label(group = "Study group", d_2014 = "N", m_2014 = "Percent (SE)", 
               dif_2014 = " Difference (p-value)", d_2016 = "N", 
               m_2016 = "Percent (SE)", dif_2016 = " Difference (p-value)", 
               trends_rel = "Percent relative change", 
               trends_abs = "Percent absolute change", DiDcoef = "Adjusted DiD (p-value)") %>%
    tab_style(style = cell_text(weight = "bold"), 
              location = cells_column_labels(columns = everything())) %>%
    gt::gtsave("table_fig/paper/Supplemental/table_s1_child_match.png")
```

### Figure 2: Changes in sick-child care seeking behavior and care content for children (<5 years old) over the first two years of PIVOT-MoH’s HSS intervention
```{r}
# Figure:
# Generate figure based on table
num_var <- 1

fig <- as.data.frame(matrix(nrow = num_var * 4, ncol = 6))

fig$V1 <- round(c(
  table2_children$m_2014, table2_children$m_2016), 2)
fig$V2 <- round(c(
  table2_children$n_2014, table2_children$n_2016), 0)
fig$V3 <- round(c(
  table2_children$d_2014, table2_children$d_2016), 0)
fig$V4 <- rep(c("Intervention", "Non-Intervention"), num_var * 2)
fig$V5 <- rep(c("2014 survey", "2016 survey"), each = num_var * 2)
fig$V6 <- rep("Careseeking for Children", num_var * 4) ### RENAME BASED ON OUTCOME ###
colnames(fig) <- c("mean", "num", "dem", "group", "year", "indicator")
fig$group <- factor(fig$group, levels = c("Non-Intervention", "Intervention"))

fig <- 
ggplot(fig, 
         aes(x = mean, y = group, group = interaction(group, indicator), 
             label = sprintf("%s/%s", num, dem))) +
  geom_point(aes(colour = year), size = 3.5) +
  scale_colour_manual(breaks = fig$year, 
                      values = c("2014 survey" = "black", "2016 survey" = "red")) +
  geom_line() +
  labs(
    x = "Percentage", y = "",
    title = "Matching on Household Wealth Index: \n Any Sick-Child Visits to a PHF or CHW"
  ) +
  theme_bw() +
  scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10), expand = c(0, 0)) +
  scale_y_discrete(position = "right") +
  theme(
    legend.position = "bottom", axis.text = element_text(size = 10), 
    panel.grid.minor = element_blank(), legend.title = element_blank(),
    title = element_text(size = 12, face = "bold"), 
    axis.title = element_text(size = 13),
    legend.text = element_text(size = 12))+
  annotate("text", label="}", x=60, y=1.5, size =20, fontface="plain")+
  annotate("text", label=paste0("Adjusted DiD (p-val): \n",final_table2_disease$DiDcoef[2]), x=75, y=1.5)

saveRDS(fig,"table_fig/child/fig_child_match.rds")
ggsave("table_fig/child/fig2_match.png")
```

## Balance before and after Matching 

```{r}
child_notmatch <- readRDS("data/cleaned_children_did_alldis.rds") %>%
    select(ID_menage, enfant_age_annee, year, cluster, wmweight, group, 
           menage_score_bien_etre, ALL.disease, ALL.public_level, 
           ALL.public_communitycenter,ALL.public_healthfacility, 
           ALL.anytreatment, ALL.notreatment) %>%
    mutate(group_year=as.factor(case_when(
    group==1&year==0 ~ 1, #Treatment in pre-period: Group 1 in Stuart et al 2014
    group==1&year==1 ~ 2, #Treatment in post-period: Group 2 in Stuart et al 2014
    group==0&year==0 ~ 3, #Control in pre-period: Group 3 in Stuart et al 2014
    group==0&year==1 ~ 4 #Control in post-period: Group 4 in Stuart et al 2014
  ))) %>%
  mutate(match = 0) 

child_match<-readRDS("data/match_children.rds")%>%
  mutate(group_year=as.factor(case_when(
    group==1&year==0 ~ 1, #Treatment in pre-period: Group 1 in Stuart et al 2014
    group==1&year==1 ~ 2, #Treatment in post-period: Group 2 in Stuart et al 2014
    group==0&year==0 ~ 3, #Control in pre-period: Group 3 in Stuart et al 2014
    group==0&year==1 ~ 4 #Control in post-period: Group 4 in Stuart et al 2014
  ))) %>%
  mutate(match = 1)

child_full <- rbind(child_match, child_notmatch)
child_full$match <- factor(child_full$match, levels = c("0", "1"),
                  labels = c("Unmatched", "Matched")
                  )

child_full %>%
  ggplot() +
  geom_density(aes(menage_score_bien_etre, fill = group_year), alpha = 0.4, adjust = 1.5) +
  labs(x = "Wealth Index Score", 
       title = "Fig. 2 Balance Before and After Matching: \n Households with Children with Any Illness", 
       fill = "Treatment and Period") +
  scale_fill_discrete(labels=c("1"="Treatment Preperiod","2"="Treatment Postperiod","3"="Control Preperiod","4"="Control Postperiod")) +
  theme_bw()+
  theme(plot.title = element_text(size=14, face= "bold"), 
        axis.line = element_line(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  facet_wrap(~match)
ggsave("table_fig/paper/fig2_matching_balance_child.png", width=8, height=4, dpi=300)
```

